22 May, 2017

suppressWarnings(library(tidyverse))
suppressWarnings(library(magrittr))
suppressWarnings(library(knitr))
suppressWarnings(library(broom))
suppressWarnings(library(arsRtools))

Deeptools to R

This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
code not run only shown for documentation

#!/bin/sh

cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1exon1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS exon1 "eGFP" "Ars2,Cbp80,Z18" \
"deeptools_out/Refseqv3q1_exon1_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"


cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/

bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1exon1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS exon1 "EGFP" "RRP40" \
"deeptools_out/Refseqv3q1_exon1_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"

matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_exon1_TSS_sensitivity_joined.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_exon1_TSS_sensitivity_joined.gz
biotype: multiexonic_RefseqNMq1
anchor: TSS

code not run only shown for documentation

bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola)
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')

Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_multiexonic_RefseqNMq1_TSS.RData

code not run only shown for documentation

save(bin_values, file=bin_values_filename)

Load the tidy data frame of single 50bp bin values

everything below is run.

load(bin_values_filename, verbose=TRUE)
## Loading objects:
##   bin_values

heatmaps scaled values

data('RNAseq_value_heatmap_theme', package='arsRtools')
row_order <- bin_values %>%
    group_by(id) %>% 
    summarize(total_value = sum(value)) %>% 
    ungroup %>% 
    arrange(total_value) %$% 
    id

bin_values %>%
  filter(value > 0) %>%
  mutate(id = factor(id, levels=row_order)) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_value_heatmap_theme +
  theme(axis.text.y=element_blank())

are there blank lines

total ids considered:

distinct(bin_values, id) %>%
  nrow
## [1] 7233

empty:

empty_ids_TSS <- bin_values %>%
  group_by(id) %>%
  summarize(sum_value = sum(value),
            max_value = max(value)) %>%
  filter(sum_value == 0 & max_value == 0) %$%
  id

length(empty_ids_TSS)
## [1] 704

save empty for later use …

save(empty_ids_TSS, file='../data/RNAseq/Refseq_multi_exonic_NMq1_empty_atTSS_ids.RData')

log2 ratios

bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)

log2FC profiles

data('RNAseq_lineplot_theme', package='arsRtools')
bin_sensitivities %>%
  group_by(rel_pos, siRNA) %>%
  do(tidy(t.test(log2(.$value)))) %>%
  ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
  geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
  geom_line(aes(color=siRNA)) +
  ylab('mean log2 fold-change relative control') +
  xlab(paste0('bp to ', biotype, ' ', anchor)) +
  RNAseq_lineplot_theme +
  xlim(-2000,5000)

heatmaps log2FC

data('RNAseq_logFC_heatmap_theme', package='arsRtools')
row_order <- bin_sensitivities %>% 
    group_by(id) %>% 
    summarize(total_value = sum(value)) %>% 
    ungroup %>% 
    arrange(total_value) %$% 
    id
bin_sensitivities %<>%
  mutate(id = factor(id, levels=row_order))
bin_sensitivities %>% 
  mutate(value = case_when(.$value > 4 ~ 4,
                           .$value < .25 ~ .25,
                           TRUE ~ .$value)) %>%
  filter( !(id %in% empty_ids_TSS),
         value > 0,
         rel_pos > -2000, rel_pos < 5000) %>%
  ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
  geom_raster(interpolate = FALSE) +
  facet_grid(.~siRNA) +
  RNAseq_logFC_heatmap_theme +
  theme(axis.text.y=element_blank())

sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] arsRtools_0.1        RColorBrewer_1.1-2   rtracklayer_1.30.4  
##  [4] GenomicRanges_1.22.4 GenomeInfoDb_1.6.3   IRanges_2.4.8       
##  [7] S4Vectors_0.8.11     BiocGenerics_0.16.1  broom_0.4.1         
## [10] knitr_1.15.1         magrittr_1.5         dplyr_0.5.0         
## [13] purrr_0.2.2          readr_1.0.0          tidyr_0.6.0         
## [16] tibble_1.2           ggplot2_2.2.0        tidyverse_1.0.0     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8                futile.logger_1.4.3       
##  [3] plyr_1.8.4                 XVector_0.10.0            
##  [5] futile.options_1.0.0       bitops_1.0-6              
##  [7] zlibbioc_1.16.0            tools_3.2.3               
##  [9] digest_0.6.10              evaluate_0.10             
## [11] gtable_0.2.0               nlme_3.1-128              
## [13] lattice_0.20-34            psych_1.6.9               
## [15] DBI_0.5-1                  stringr_1.1.0             
## [17] Biostrings_2.38.4          rprojroot_1.1             
## [19] grid_3.2.3                 Biobase_2.30.0            
## [21] R6_2.2.0                   BiocParallel_1.4.3        
## [23] XML_3.98-1.5               foreign_0.8-67            
## [25] rmarkdown_1.2              lambda.r_1.1.9            
## [27] reshape2_1.4.2             GenomicAlignments_1.6.3   
## [29] Rsamtools_1.22.0           backports_1.0.4           
## [31] scales_0.4.1               htmltools_0.3.5           
## [33] SummarizedExperiment_1.0.2 assertthat_0.1            
## [35] mnormt_1.5-5               colorspace_1.3-2          
## [37] labeling_0.3               stringi_1.1.2             
## [39] RCurl_1.95-4.8             lazyeval_0.2.0            
## [41] munsell_0.4.3